home *** CD-ROM | disk | FTP | other *** search
Text File | 1997-06-19 | 3.7 KB | 150 lines | [TEXT/CWIE] |
- /*------------------------------------------------------------------------------
- #
- # NewsTicker, my Hack for 1997
- #
- # CNNExtractor.h - Derived from HTMLExtractor, we get passed the tokens
- # and try to recognize headlines out of it. We parse
- # the page "www.cnn.com", the news page of cnn
- #
- ------------------------------------------------------------------------------*/
-
- #include "TickerGlobals.h"
-
- #include "CNNExtractor.h"
- #include "HTMLExtractor.h"
-
-
- // Refresh every 20 minutes
-
- long gCNNNextTime = 0;
- #define kCNNPeriod 1200
- #define kCNNAddress "www.cnn.com"
-
- class CNNExtractor: public HTMLExtractor
- {
- protected:
- enum CNNParser { kncParsing,
- //text headlines are <a>headline</a>
- kncHasLink,
- //Some big stories are <h2>text</h2>(other><a>
- kncHasHeader, kncHasNotHeader };
-
- CNNParser mfCurrentState;
- Str255 mfTheURL;
- Str255 mfTheSubject;
-
- public:
- CNNExtractor(sMyDataPtr theDataPtr);
- virtual ~CNNExtractor (void){ }
-
- virtual void HandleToken(char* string, short numchars, Boolean isCommand);
- };
-
- //
- // We just parse the entries to find the element
- //
- CNNExtractor::CNNExtractor(sMyDataPtr theDataPtr)
- :HTMLExtractor(kCNNAddress, 1003, theDataPtr)
- {
- unsigned long now;
-
- mfCurrentState = kncParsing; //just waiting for our thing to come through
-
- GetDateTime(&now);
- gCNNNextTime = now + kCNNPeriod; //refresh the news every 20 minutes
- }
-
- void CNNExtractor::HandleToken(char* string, short numchars, Boolean isCommand)
- {
- if (isCommand)
- {
- switch (mfCurrentState)
- {
- case kncParsing: //from nothing, we want H2 or A
- if (MyCompareStr(string, "<H2>"))
- {
- mfCurrentState = kncHasHeader;
- mfTheSubject[0] = 0;
- mfTheURL[0] = 0;
- }
- else if (MyCompareStr(string, "<A "))
- {
- if (HTMLExtractor::ParseGoodURL(string+2, mfTheURL))
- {
- mfCurrentState = kncHasLink;
- mfTheSubject[0] = 0;
- }
- else mfCurrentState = kncParsing;
- }
- break;
- case kncHasLink: //if we hit another tage when in a has link,
- mfCurrentState = kncParsing; //abort
- break;
- case kncHasHeader: //for this, only waiting for </H2>
- if (MyCompareStr(string, "</H2>"))
- mfCurrentState = kncHasNotHeader;
- else mfCurrentState = kncParsing;
- break;
- case kncHasNotHeader: //for this, waiting for <a>
- if (MyCompareStr(string, "<A "))
- {
- if (HTMLExtractor::ParseGoodURL(string+2, mfTheURL))
- {
- AddEntry(mfTheSubject, mfTheURL);
- mfCurrentState = kncParsing;
- }
- else mfCurrentState = kncParsing;
- }
- break;
- }
- }
- else
- {
- if ((mfCurrentState==kncHasHeader) //OK, get got a headline!
- ||(mfCurrentState==kncHasLink))
- {
- if (numchars>255)
- numchars = 255;
- mfTheSubject[0] = numchars;
- BlockMove(string, &mfTheSubject[1], numchars);
-
- //skip some extraneous CNN stuff
- if (EqualString(mfTheSubject, "\pIMPACT", false, false))
- mfCurrentState = kncParsing;
- if (EqualString(mfTheSubject, "\pF U L L S T O R Y", false, false))
- mfCurrentState = kncParsing;
- if (EqualString(mfTheSubject, "\pTEXT - ONLY VERSION", false, false))
- mfCurrentState = kncParsing;
-
- if (mfCurrentState==kncHasLink)
- {
- //Add the entry
- AddEntry(mfTheSubject, mfTheURL);
- mfCurrentState = kncParsing;
- }
- }
- }
- }
-
- void LoadCNN(sMyDataPtr gGlobalsPtr)
- {
- CNNExtractor* theparser = new CNNExtractor(gGlobalsPtr);
-
- theparser->ReadEntries();
- delete theparser;
-
- InitCursor();
- }
-
- // This reloads us if necessary
- Boolean MustReloadCNN(sMyDataPtr gGlobalsPtr)
- {
- unsigned long now;
-
- GetDateTime(&now);
-
- if (now<gCNNNextTime) //time to check yet?
- return false;
-
- return true; //always recheck on the time
- }